001 /* 002 * Factor.java 003 * 004 * Copyright 2003 Sergio Anibal de Carvalho Junior 005 * 006 * This file is part of NeoBio. 007 * 008 * NeoBio is free software; you can redistribute it and/or modify it under the terms of 009 * the GNU General Public License as published by the Free Software Foundation; either 010 * version 2 of the License, or (at your option) any later version. 011 * 012 * NeoBio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 013 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 014 * PURPOSE. See the GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License along with NeoBio; 017 * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, 018 * Boston, MA 02111-1307, USA. 019 * 020 * Proper attribution of the author as the source of the software would be appreciated. 021 * 022 * Sergio Anibal de Carvalho Junior mailto:sergioanibaljr@users.sourceforge.net 023 * Department of Computer Science http://www.dcs.kcl.ac.uk 024 * King's College London, UK http://www.kcl.ac.uk 025 * 026 * Please visit http://neobio.sourceforge.net 027 * 028 * This project was supervised by Professor Maxime Crochemore. 029 * 030 */ 031 032 package neobio.alignment; 033 034 /** 035 * This class is used by {@linkplain FactorSequence} to create a linked list of factors of 036 * a text as induced by its Lempel-Ziv (LZ78) factorisation. 037 * 038 * <P>Each instance of this class represent a string composed of its an ancestor factor's 039 * string plus one character, and contains: 040 * 041 * <UL> 042 * <LI>a pointer to its ancestor factor (the longest factor previously seen in the text 043 * during its LZ78 factorisation); 044 * <LI>the new character; 045 * <LI>a serial number (which represents its order in the text) 046 * <LI>a pointer to the next factor of the text 047 * <LI>its length (number of characters, which is equal to its ancestor's length plus one) 048 * </UL> 049 * 050 * @author Sergio A. de Carvalho Jr. 051 * @see FactorSequence 052 */ 053 public class Factor 054 { 055 /** 056 * A pointer to this factor's ancestor, which represents a prefix of this factor's 057 * text. 058 */ 059 protected Factor ancestor; 060 061 /** 062 * A pointer to the next factor. 063 */ 064 protected Factor next; 065 066 /** 067 * This factor's serial number, which indicates the order of this factor inside the 068 * linked list of factors of a text. 069 */ 070 protected int serial_number; 071 072 /** 073 * The number of characters of the text represented by this factor. 074 */ 075 protected int length; 076 077 /** 078 * The new character of this factor. 079 */ 080 protected char new_char; 081 082 /** 083 * Creates a new empty <CODE>Factor</CODE>. It has no ancestor and no character (both 084 * are set to <CODE>null</CODE>). Its serial number is set to zero as well as its 085 * length. 086 * 087 * <P>This constructor is used to initiate the a linked list of factors of a text. Its 088 * <CODE>next</CODE> pointer is initially <CODE>null</CODE>, but it is typically set 089 * to point to the first factor afterwards (with the <CODE>setNext</CODE> method). 090 * 091 * @see #setNext 092 */ 093 public Factor () 094 { 095 this.ancestor = null; 096 this.next = null; 097 this.serial_number = 0; 098 this.length = 0; 099 this.new_char = 0; 100 } 101 102 /** 103 * Creates a new <CODE>Factor</CODE> instance with the specified serial number and 104 * new character, and pointing to the given ancestor. Its length is set to its 105 * ancestor's length plus 1. 106 * 107 * <P>Its <CODE>next</CODE> pointer is initially <CODE>null</CODE>, but it is 108 * typically set to point to the next factor afterwards (with the <CODE>setNext</CODE> 109 * method). 110 * 111 * @param ancestor this factor's ancestor 112 * @param serial_number this factor's serial number 113 * @param new_char this factor's new character 114 * @see #setNext 115 */ 116 public Factor (Factor ancestor, int serial_number, char new_char) 117 { 118 this.ancestor = ancestor; 119 this.serial_number = serial_number; 120 this.new_char = new_char; 121 if (ancestor != null) 122 this.length = ancestor.length() + 1; 123 else 124 throw new IllegalArgumentException ("Ancestor factor cannot be null."); 125 } 126 127 /** 128 * Sets this factor's <CODE>next</CODE> pointer to point to the specified factor. 129 * Although the next factor has typically a serial number equal to this factor's 130 * serial number plus 1, no attempt is made to guarantee this rule. This allows 131 * special constructs or a different order in the factorisation. 132 * 133 * @param next the factor that will be pointed to 134 * @see #getNext 135 */ 136 public void setNext (Factor next) 137 { 138 this.next = next; 139 } 140 141 /** 142 * Returns this factor's ancestor factor. 143 * 144 * @return this factor's ancestor factor 145 */ 146 public Factor getAncestor () 147 { 148 return ancestor; 149 } 150 151 /** 152 * This method is a shorthand to return the serial number of this factor's ancestor. 153 * Note that it does not check if this factor has an ancestor or not, therefore, if 154 * it is called on the root factor, a NullPointerException is raised. 155 * 156 * @return the serial number of this factor's ancestor 157 */ 158 public int getAncestorSerialNumber () 159 { 160 return ancestor.getSerialNumber(); 161 } 162 163 /** 164 * Returns this factor's next factor. 165 * 166 * @return this factor's next factor 167 * @see #setNext 168 */ 169 public Factor getNext () 170 { 171 return next; 172 } 173 174 /** 175 * Returns this factor's serial number. 176 * 177 * @return this factor's serial number 178 */ 179 public int getSerialNumber () 180 { 181 return serial_number; 182 } 183 184 /** 185 * Returns this factor's length. 186 * 187 * @return this factor's length 188 */ 189 public int length () 190 { 191 return length; 192 } 193 194 /** 195 * Returns this factor's new character. 196 * 197 * @return this factor's new character 198 */ 199 public char getNewChar () 200 { 201 return new_char; 202 } 203 204 /** 205 * Returns a string representation of the text represented by this factor. It inspects 206 * its chain of ancestors up until as far as the root factor, spelling their new 207 * characters out. 208 * 209 * @return a string representation of the text denoted by this factor 210 */ 211 public String toString () 212 { 213 StringBuffer buf = new StringBuffer(); 214 Factor ancestor = this; 215 216 while (ancestor.getAncestor() != null) 217 { 218 buf.insert(0, ancestor.getNewChar()); 219 ancestor = ancestor.getAncestor(); 220 } 221 222 return buf.toString(); 223 } 224 }